This notebook is an analysis of the Google Play Store.
Data Source:
App and review data was scraped from the Google Play Store by Lavanya Gupta in 2018. Original files listed here.
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
pd.options.display.float_format = '{:,.2f}'.format
df_apps = pd.read_csv('apps.csv')
df_apps.shape
(10841, 12)
df_apps.columns
Index(['App', 'Category', 'Rating', 'Reviews', 'Size_MBs', 'Installs', 'Type',
'Price', 'Content_Rating', 'Genres', 'Last_Updated', 'Android_Ver'],
dtype='object')
df_apps.sample(5)
| App | Category | Rating | Reviews | Size_MBs | Installs | Type | Price | Content_Rating | Genres | Last_Updated | Android_Ver | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3743 | 哈哈姆特不EY | COMMUNICATION | NaN | 239 | 18.00 | 10,000 | Free | 0 | Everyone | Communication | July 31, 2018 | 4.4 and up |
| 5397 | BEBONCOOL GAMEPAD V1.0 | GAME | 3.90 | 404 | 2.20 | 100,000 | Free | 0 | Everyone | Arcade | August 30, 2017 | 4.0 and up |
| 7046 | Yandex.Shell (Launcher+Dialer) | PERSONALIZATION | 4.40 | 87300 | 6.15 | 1,000,000 | Free | 0 | Everyone | Personalization | March 17, 2015 | Varies with device |
| 5116 | Weather From DMI/YR | WEATHER | 4.30 | 2143 | 4.75 | 100,000 | Free | 0 | Everyone | Weather | July 31, 2018 | Varies with device |
| 6965 | CppDroid - C/C++ IDE | FAMILY | 4.10 | 29978 | 19.00 | 1,000,000 | Free | 0 | Everyone | Education | August 17, 2017 | Varies with device |
df_apps_new = df_apps.drop(columns=["Last_Updated", "Android_Ver"])
df_apps_new.Rating.isna().sum()
1474
df_apps_new[df_apps_new["Rating"].isna() == True]
| App | Category | Rating | Reviews | Size_MBs | Installs | Type | Price | Content_Rating | Genres | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Ak Parti Yardım Toplama | SOCIAL | NaN | 0 | 8.70 | 0 | Paid | $13.99 | Teen | Social |
| 1 | Ain Arabic Kids Alif Ba ta | FAMILY | NaN | 0 | 33.00 | 0 | Paid | $2.99 | Everyone | Education |
| 2 | Popsicle Launcher for Android P 9.0 launcher | PERSONALIZATION | NaN | 0 | 5.50 | 0 | Paid | $1.49 | Everyone | Personalization |
| 3 | Command & Conquer: Rivals | FAMILY | NaN | 0 | 19.00 | 0 | NaN | 0 | Everyone 10+ | Strategy |
| 4 | CX Network | BUSINESS | NaN | 0 | 10.00 | 0 | Free | 0 | Everyone | Business |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5840 | Em Fuga Brasil | FAMILY | NaN | 1317 | 60.00 | 100,000 | Free | 0 | Everyone | Simulation |
| 5862 | Voice Tables - no internet | PARENTING | NaN | 970 | 71.00 | 100,000 | Free | 0 | Everyone | Parenting |
| 6141 | Young Speeches | LIBRARIES_AND_DEMO | NaN | 2221 | 2.40 | 500,000 | Free | 0 | Everyone | Libraries & Demo |
| 7035 | SD card backup | TOOLS | NaN | 142 | 3.40 | 1,000,000 | Free | 0 | Everyone | Tools |
| 7175 | Android TV Remote Service | TOOLS | NaN | 1 | 3.70 | 1,000,000 | Free | 0 | Everyone | Tools |
1474 rows × 10 columns
df_apps_clean = df_apps_new.dropna()
df_apps_clean.sample(5)
| App | Category | Rating | Reviews | Size_MBs | Installs | Type | Price | Content_Rating | Genres | |
|---|---|---|---|---|---|---|---|---|---|---|
| 4143 | Blue CareOnDemand | MEDICAL | 4.40 | 163 | 40.00 | 10,000 | Free | 0 | Everyone | Medical |
| 6518 | Elmo Calls by Sesame Street | FAMILY | 3.90 | 6903 | 25.00 | 1,000,000 | Free | 0 | Everyone | Educational;Pretend Play |
| 5598 | Lapse It • Time Lapse • Pro | PHOTOGRAPHY | 4.30 | 12865 | 6.90 | 100,000 | Paid | $2.99 | Everyone | Photography |
| 8762 | iHoroscope - 2018 Daily Horoscope & Astrology | LIFESTYLE | 4.50 | 398307 | 19.00 | 10,000,000 | Free | 0 | Everyone | Lifestyle |
| 9742 | Five Nights at Freddy's 3 Demo | GAME | 4.50 | 1041836 | 50.00 | 10,000,000 | Free | 0 | Teen | Action |
df_apps_clean.duplicated().sum()
476
df_apps_clean = df_apps_clean.drop_duplicates(subset=["App","Type","Price"])
df_apps_clean.shape
(8199, 10)
df_apps_clean_manyreviews = df_apps_clean[df_apps_clean["Reviews"] > 100]
df_apps_clean_manyreviews.sort_values(by="Rating", ascending=False).head(10)
| App | Category | Rating | Reviews | Size_MBs | Installs | Type | Price | Content_Rating | Genres | |
|---|---|---|---|---|---|---|---|---|---|---|
| 3115 | Oración CX | LIFESTYLE | 5.00 | 103 | 3.80 | 5,000 | Free | 0 | Everyone | Lifestyle |
| 2095 | Ríos de Fe | LIFESTYLE | 5.00 | 141 | 15.00 | 1,000 | Free | 0 | Everyone | Lifestyle |
| 2438 | FD Calculator (EMI, SIP, RD & Loan Eligilibility) | FINANCE | 5.00 | 104 | 2.30 | 1,000 | Free | 0 | Everyone | Finance |
| 4791 | Tickets SDA 2018 and Exam from the State Traff... | AUTO_AND_VEHICLES | 4.90 | 10479 | 33.00 | 100,000 | Free | 0 | Everyone | Auto & Vehicles |
| 2650 | DC Comics Amino | SOCIAL | 4.90 | 117 | 63.00 | 1,000 | Free | 0 | Teen | Social |
| 4903 | CDL Practice Test 2018 Edition | AUTO_AND_VEHICLES | 4.90 | 7774 | 17.00 | 100,000 | Free | 0 | Everyone | Auto & Vehicles |
| 2996 | H*nest Meditation | LIFESTYLE | 4.90 | 145 | 48.00 | 5,000 | Paid | $1.99 | Mature 17+ | Lifestyle |
| 6476 | ipsy: Makeup, Beauty, and Tips | BEAUTY | 4.90 | 49790 | 14.00 | 1,000,000 | Free | 0 | Everyone | Beauty |
| 3419 | DQSalmaan - A fan made App | FAMILY | 4.90 | 707 | 6.00 | 10,000 | Free | 0 | Everyone | Entertainment |
| 7127 | StrongLifts 5x5 Workout Gym Log & Personal Tra... | HEALTH_AND_FITNESS | 4.90 | 66791 | 10.00 | 1,000,000 | Free | 0 | Everyone | Health & Fitness |
df_apps_clean.sort_values(by="Size_MBs", ascending=False).head()
| App | Category | Rating | Reviews | Size_MBs | Installs | Type | Price | Content_Rating | Genres | |
|---|---|---|---|---|---|---|---|---|---|---|
| 9942 | Talking Babsy Baby: Baby Games | LIFESTYLE | 4.00 | 140995 | 100.00 | 10,000,000 | Free | 0 | Everyone | Lifestyle;Pretend Play |
| 10687 | Hungry Shark Evolution | GAME | 4.50 | 6074334 | 100.00 | 100,000,000 | Free | 0 | Teen | Arcade |
| 9943 | Miami crime simulator | GAME | 4.00 | 254518 | 100.00 | 10,000,000 | Free | 0 | Mature 17+ | Action |
| 9944 | Gangster Town: Vice District | FAMILY | 4.30 | 65146 | 100.00 | 10,000,000 | Free | 0 | Mature 17+ | Simulation |
| 3144 | Vi Trainer | HEALTH_AND_FITNESS | 3.60 | 124 | 100.00 | 5,000 | Free | 0 | Everyone | Health & Fitness |
df_apps_clean.sort_values(by="Reviews", ascending=False).head(5)
| App | Category | Rating | Reviews | Size_MBs | Installs | Type | Price | Content_Rating | Genres | |
|---|---|---|---|---|---|---|---|---|---|---|
| 10805 | SOCIAL | 4.10 | 78158306 | 5.30 | 1,000,000,000 | Free | 0 | Teen | Social | |
| 10785 | WhatsApp Messenger | COMMUNICATION | 4.40 | 69119316 | 3.50 | 1,000,000,000 | Free | 0 | Everyone | Communication |
| 10806 | SOCIAL | 4.50 | 66577313 | 5.30 | 1,000,000,000 | Free | 0 | Teen | Social | |
| 10784 | Messenger – Text and Video Chat for Free | COMMUNICATION | 4.00 | 56642847 | 3.50 | 1,000,000,000 | Free | 0 | Everyone | Communication |
| 10650 | Clash of Clans | GAME | 4.60 | 44891723 | 98.00 | 100,000,000 | Free | 0 | Everyone 10+ | Strategy |
ratings = df_apps_clean.Content_Rating.value_counts()
ratings
Everyone 6621 Teen 912 Mature 17+ 357 Everyone 10+ 305 Adults only 18+ 3 Unrated 1 Name: Content_Rating, dtype: int64
fig = px.pie(labels=ratings.index,
values=ratings.values,
title="Content Rating Distribution",
names=ratings.index,
hole=0.6)
fig.update_traces(textposition='outside', textinfo='percent+label')
fig.show()
df_apps_clean.dtypes
App object Category object Rating float64 Reviews int64 Size_MBs float64 Installs object Type object Price object Content_Rating object Genres object dtype: object
df_apps_clean.Installs.value_counts()
1,000,000 1417 100,000 1096 10,000 988 10,000,000 933 1,000 698 5,000,000 607 500,000 504 50,000 457 5,000 425 100 303 50,000,000 202 500 199 100,000,000 189 10 69 50 56 500,000,000 24 1,000,000,000 20 5 9 1 3 Name: Installs, dtype: int64
df_apps_clean.Installs = df_apps_clean.Installs.str.replace(',', '')
df_apps_clean.Installs = pd.to_numeric(df_apps_clean.Installs)
df_apps_clean[['App', 'Installs']].groupby('Installs').count()
| App | |
|---|---|
| Installs | |
| 1 | 3 |
| 5 | 9 |
| 10 | 69 |
| 50 | 56 |
| 100 | 303 |
| 500 | 199 |
| 1000 | 698 |
| 5000 | 425 |
| 10000 | 988 |
| 50000 | 457 |
| 100000 | 1096 |
| 500000 | 504 |
| 1000000 | 1417 |
| 5000000 | 607 |
| 10000000 | 933 |
| 50000000 | 202 |
| 100000000 | 189 |
| 500000000 | 24 |
| 1000000000 | 20 |
df_apps_clean["Price"].head()
21 0 28 $1.49 47 $0.99 82 0 99 0 Name: Price, dtype: object
df_apps_clean.Price = df_apps_clean.Price.str.replace('$', '', regex=True)
df_apps_clean.Price = pd.to_numeric(df_apps_clean.Price)
df_apps_clean.sort_values(by="Price",ascending=False).head(20)
| App | Category | Rating | Reviews | Size_MBs | Installs | Type | Price | Content_Rating | Genres | |
|---|---|---|---|---|---|---|---|---|---|---|
| 3946 | I'm Rich - Trump Edition | LIFESTYLE | 3.60 | 275 | 7.30 | 10000 | Paid | 400.00 | Everyone | Lifestyle |
| 2461 | I AM RICH PRO PLUS | FINANCE | 4.00 | 36 | 41.00 | 1000 | Paid | 399.99 | Everyone | Finance |
| 4606 | I Am Rich Premium | FINANCE | 4.10 | 1867 | 4.70 | 50000 | Paid | 399.99 | Everyone | Finance |
| 3145 | I am rich(premium) | FINANCE | 3.50 | 472 | 0.94 | 5000 | Paid | 399.99 | Everyone | Finance |
| 3554 | 💎 I'm rich | LIFESTYLE | 3.80 | 718 | 26.00 | 10000 | Paid | 399.99 | Everyone | Lifestyle |
| 5765 | I am rich | LIFESTYLE | 3.80 | 3547 | 1.80 | 100000 | Paid | 399.99 | Everyone | Lifestyle |
| 1946 | I am rich (Most expensive app) | FINANCE | 4.10 | 129 | 2.70 | 1000 | Paid | 399.99 | Teen | Finance |
| 2775 | I Am Rich Pro | FAMILY | 4.40 | 201 | 2.70 | 5000 | Paid | 399.99 | Everyone | Entertainment |
| 3221 | I am Rich Plus | FAMILY | 4.00 | 856 | 8.70 | 10000 | Paid | 399.99 | Everyone | Entertainment |
| 3114 | I am Rich | FINANCE | 4.30 | 180 | 3.80 | 5000 | Paid | 399.99 | Everyone | Finance |
| 1331 | most expensive app (H) | FAMILY | 4.30 | 6 | 1.50 | 100 | Paid | 399.99 | Everyone | Entertainment |
| 2394 | I am Rich! | FINANCE | 3.80 | 93 | 22.00 | 1000 | Paid | 399.99 | Everyone | Finance |
| 3897 | I Am Rich | FAMILY | 3.60 | 217 | 4.90 | 10000 | Paid | 389.99 | Everyone | Entertainment |
| 2193 | I am extremely Rich | LIFESTYLE | 2.90 | 41 | 2.90 | 1000 | Paid | 379.99 | Everyone | Lifestyle |
| 3856 | I am rich VIP | LIFESTYLE | 3.80 | 411 | 2.60 | 10000 | Paid | 299.99 | Everyone | Lifestyle |
| 2281 | Vargo Anesthesia Mega App | MEDICAL | 4.60 | 92 | 32.00 | 1000 | Paid | 79.99 | Everyone | Medical |
| 1407 | LTC AS Legal | MEDICAL | 4.00 | 6 | 1.30 | 100 | Paid | 39.99 | Everyone | Medical |
| 2629 | I am Rich Person | LIFESTYLE | 4.20 | 134 | 1.80 | 1000 | Paid | 37.99 | Everyone | Lifestyle |
| 2481 | A Manual of Acupuncture | MEDICAL | 3.50 | 214 | 68.00 | 1000 | Paid | 33.99 | Everyone | Medical |
| 4264 | Golfshot Plus: Golf GPS | SPORTS | 4.10 | 3387 | 25.00 | 50000 | Paid | 29.99 | Everyone | Sports |
indexOverprices = df_apps_clean[df_apps_clean.Price > 250].index
df_apps_clean.drop(indexOverprices, inplace=True)
df_apps_clean.sort_values(by="Price",ascending=False).head(20)
| App | Category | Rating | Reviews | Size_MBs | Installs | Type | Price | Content_Rating | Genres | |
|---|---|---|---|---|---|---|---|---|---|---|
| 2281 | Vargo Anesthesia Mega App | MEDICAL | 4.60 | 92 | 32.00 | 1000 | Paid | 79.99 | Everyone | Medical |
| 1407 | LTC AS Legal | MEDICAL | 4.00 | 6 | 1.30 | 100 | Paid | 39.99 | Everyone | Medical |
| 2629 | I am Rich Person | LIFESTYLE | 4.20 | 134 | 1.80 | 1000 | Paid | 37.99 | Everyone | Lifestyle |
| 2481 | A Manual of Acupuncture | MEDICAL | 3.50 | 214 | 68.00 | 1000 | Paid | 33.99 | Everyone | Medical |
| 2463 | PTA Content Master | MEDICAL | 4.20 | 64 | 41.00 | 1000 | Paid | 29.99 | Everyone | Medical |
| 2207 | EMT PASS | MEDICAL | 3.40 | 51 | 2.40 | 1000 | Paid | 29.99 | Everyone | Medical |
| 4264 | Golfshot Plus: Golf GPS | SPORTS | 4.10 | 3387 | 25.00 | 50000 | Paid | 29.99 | Everyone | Sports |
| 504 | AP Art History Flashcards | FAMILY | 5.00 | 1 | 96.00 | 10 | Paid | 29.99 | Mature 17+ | Education |
| 4772 | Human Anatomy Atlas 2018: Complete 3D Human Body | MEDICAL | 4.50 | 2921 | 25.00 | 100000 | Paid | 24.99 | Everyone | Medical |
| 3241 | Muscle Premium - Human Anatomy, Kinesiology, B... | MEDICAL | 4.20 | 168 | 25.00 | 10000 | Paid | 24.99 | Everyone | Medical |
| 2119 | NewTek NDI | PHOTOGRAPHY | 3.50 | 77 | 1.20 | 1000 | Paid | 19.99 | Everyone | Photography |
| 4470 | DRAGON QUEST VIII | FAMILY | 4.50 | 7812 | 27.00 | 50000 | Paid | 19.99 | Everyone 10+ | Role Playing |
| 2293 | Hospitalist Handbook | MEDICAL | 4.80 | 12 | 18.00 | 1000 | Paid | 19.99 | Everyone | Medical |
| 526 | USMLE Step 2 CK Flashcards | FAMILY | 5.00 | 1 | 40.00 | 10 | Paid | 19.99 | Everyone | Education |
| 2473 | boattheory.ch Full 2018 | FAMILY | 4.70 | 54 | 50.00 | 1000 | Paid | 19.40 | Everyone | Education |
| 4090 | I am Rich Premium Plus | FINANCE | 4.60 | 459 | 2.00 | 10000 | Paid | 18.99 | Everyone | Finance |
| 1508 | SkyTest BU/GU Lite | BUSINESS | 2.90 | 28 | 20.00 | 500 | Paid | 17.99 | Everyone | Business |
| 3778 | The World Ends With You | GAME | 4.60 | 4108 | 13.00 | 10000 | Paid | 17.99 | Everyone 10+ | Arcade |
| 2603 | 2017 EMRA Antibiotic Guide | MEDICAL | 4.40 | 12 | 3.80 | 1000 | Paid | 16.99 | Everyone | Medical |
| 3439 | Trine 2: Complete Story | GAME | 3.80 | 252 | 11.00 | 10000 | Paid | 16.99 | Teen | Action |
gross_revenue = df_apps_clean.Installs * df_apps_clean.Price
df_apps_clean.insert(9, 'Revenue_Estimate', gross_revenue)
df_apps_clean.sort_values(by="Revenue_Estimate",ascending=False).head(10)
| App | Category | Rating | Reviews | Size_MBs | Installs | Type | Price | Content_Rating | Revenue_Estimate | Genres | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 9220 | Minecraft | FAMILY | 4.50 | 2376564 | 19.00 | 10000000 | Paid | 6.99 | Everyone 10+ | 69,900,000.00 | Arcade;Action & Adventure |
| 8825 | Hitman Sniper | GAME | 4.60 | 408292 | 29.00 | 10000000 | Paid | 0.99 | Mature 17+ | 9,900,000.00 | Action |
| 7151 | Grand Theft Auto: San Andreas | GAME | 4.40 | 348962 | 26.00 | 1000000 | Paid | 6.99 | Mature 17+ | 6,990,000.00 | Action |
| 7477 | Facetune - For Free | PHOTOGRAPHY | 4.40 | 49553 | 48.00 | 1000000 | Paid | 5.99 | Everyone | 5,990,000.00 | Photography |
| 7977 | Sleep as Android Unlock | LIFESTYLE | 4.50 | 23966 | 0.85 | 1000000 | Paid | 5.99 | Everyone | 5,990,000.00 | Lifestyle |
| 6594 | DraStic DS Emulator | GAME | 4.60 | 87766 | 12.00 | 1000000 | Paid | 4.99 | Everyone | 4,990,000.00 | Action |
| 6082 | Weather Live | WEATHER | 4.50 | 76593 | 4.75 | 500000 | Paid | 5.99 | Everyone | 2,995,000.00 | Weather |
| 7954 | Bloons TD 5 | FAMILY | 4.60 | 190086 | 94.00 | 1000000 | Paid | 2.99 | Everyone | 2,990,000.00 | Strategy |
| 7633 | Five Nights at Freddy's | GAME | 4.60 | 100805 | 50.00 | 1000000 | Paid | 2.99 | Teen | 2,990,000.00 | Action |
| 6746 | Card Wars - Adventure Time | FAMILY | 4.30 | 129603 | 23.00 | 1000000 | Paid | 2.99 | Everyone 10+ | 2,990,000.00 | Card;Action & Adventure |
df_apps_clean.Category.nunique()
33
top10_category = df_apps_clean.Category.value_counts().head(10)
bar = px.bar(x = top10_category.index,
y = top10_category.values,
labels={'x':'Category', 'y':'Number of Apps'},
title = "Highest Competition in terms of Number of Apps")
bar.show()
category_installs = df_apps_clean.groupby('Category').agg({'Installs': pd.Series.sum})
category_installs.sort_values('Installs', ascending=True, inplace=True)
h_bar = px.bar(x = category_installs.Installs,
y = category_installs.index,
orientation='h',
title='Category Popularity in terms of Number of Downloads')
h_bar.update_layout(xaxis_title='Number of Downloads', yaxis_title='Category')
h_bar.show()
cat_number = df_apps_clean.groupby('Category').agg({'App': pd.Series.count})
cat_merged_df = pd.merge(cat_number, category_installs, on='Category', how="inner")
print(f'The dimensions of the DataFrame are: {cat_merged_df.shape}')
cat_merged_df.sort_values('Installs', ascending=False).head()
The dimensions of the DataFrame are: (33, 2)
| App | Installs | |
|---|---|---|
| Category | ||
| GAME | 910 | 13858762717 |
| COMMUNICATION | 257 | 11039241530 |
| TOOLS | 719 | 8099724500 |
| PRODUCTIVITY | 301 | 5788070180 |
| SOCIAL | 203 | 5487841475 |
scatter = px.scatter(cat_merged_df,
x='App',
y='Installs',
title='Category Concentration',
size='App',
hover_name=cat_merged_df.index,
color='Installs')
scatter.update_layout(xaxis_title="Number of Apps (Lower=More Concentrated)",
yaxis_title="Installs",
yaxis=dict(type='log'))
scatter.show()
df_apps_clean.Genres.nunique()
114
df_apps_clean.Genres.value_counts()
Tools 718
Entertainment 467
Education 429
Productivity 301
Personalization 298
...
Lifestyle;Education 1
Health & Fitness;Education 1
Arcade;Pretend Play 1
Tools;Education 1
Board;Pretend Play 1
Name: Genres, Length: 114, dtype: int64
reretest = df_apps_clean.Genres.str.split(pat=";",expand=True)
final_test = reretest.stack()
final_genre = final_test.value_counts()
final_genre.shape
(53,)
genre_bar = px.bar(x=final_genre.index[:15], y=final_genre[:15],title='Top Genres',labels={'x':'Genre', 'y':'Number of Apps'},color=final_genre[:15],color_continuous_scale="Agsunset")
genre_bar.update_layout(coloraxis_showscale=False)
genre_bar.show()
df_apps_free = df_apps_clean[df_apps_clean.Type == "Free"]
df_apps_paid = df_apps_clean[df_apps_clean.Type == "Paid"]
df_free = df_apps_free.groupby('Category').agg({'Type': pd.Series.count})
df_paid = df_apps_paid.groupby('Category').agg({'Type': pd.Series.count})
df_merged = pd.merge(df_free, df_paid, on='Category', how="left")
df_merged.rename(columns={"Type_x": "Free", "Type_y": "Paid"},inplace=True)
df_merged.fillna(0, inplace=True)
fig = go.Figure()
fig.add_trace(go.Bar(
x = df_merged.index,
y = df_merged.Free,
name = "Free Apps",
))
fig.add_trace(go.Bar(
x = df_merged.index,
y = df_merged.Paid,
name = "Paid Apps",
))
fig.update_layout(title="Free vs Paid apps by Category",xaxis_title="Category",
yaxis_title="Number of Apps",
yaxis=dict(type='log'))
fig.update_xaxes(categoryorder='total descending')
fig.show()
df_apps_free.Installs.value_counts()
1000000 1397 100000 1014 10000000 931 10000 871 5000000 607 1000 567 500000 493 50000 417 5000 360 100 237 50000000 202 100000000 189 500 163 10 51 50 42 500000000 24 1000000000 20 5 9 1 1 Name: Installs, dtype: int64
df = df_apps_clean
fig = px.box(df, x="Type", y=df_apps_clean.Installs, points="all",color="Type",notched=True)
fig.update_layout(title="Number of Downloads for Free Apps vs Paid Apps",yaxis=dict(type='log'))
fig.show()
df = df_apps_paid
fig = px.box(df, x="Category", y="Revenue_Estimate")
fig.update_layout(title="Paid Apps Turnover Estimate",yaxis_title="Paid Apps Revenue",yaxis=dict(type='log'))
fig.update_xaxes(categoryorder='min ascending')
fig.show()
df_apps_paid.Price.median()
2.99
df = df_apps_paid
fig = px.box(df, x="Category", y="Price")
fig.update_layout(title="Paid Apps Prices",yaxis_title="Price",yaxis=dict(type='log'))
fig.update_xaxes(categoryorder='max descending')
fig.show()